
library(tidyverse)
library(rio)
library(cregg)
library(patchwork)

# set replication folder as working directory
setwd("~replication")

load("data_genderedcost_long.rdata")

# only include completed answers - and answers given before deadline
# 2021-12-20 21:49:52 was the last response within the time frame
df_long <- df_long %>% 
  filter(SurveyStatus==2)

df_long <- df_long %>% 
  filter(SurveyEndTime<="2021-12-20 21:49:52")

### Willingness to pay and willingness to work
## create the right reference category for attributes

#### WILLINGNESS TO PAY ####

## create the right reference groups for attributes
df_long <- df_long %>%
  mutate(position = factor(position, levels = c("Common Member",
                                                "Committee chair - small influence",
                                                "Committee chair - large influence")),
         remuneration = factor(remuneration, levels = c("Normal remuneration",
                                                        "10% lower remuneration",
                                                        "10% higher remuneration")),
         workload = factor(workload, levels = c("Normal workload",
                                                "10% lower workload",
                                                "10% higher workload")),
         work_environment = factor(work_environment, levels = c("Several experienced harassment",
                                                                "Characterized by mutual respect",
                                                                "Several experienced sexism and harassment")))

#### bootstrap confidence intervals
# create vectors to store estimates
pay_women_sexism <- NULL
pay_men_sexism <- NULL
pay_gap_sexism <- NULL

work_women_sexism <- NULL
work_men_sexism <- NULL
work_gap_sexism <- NULL

# set seed for reproducibility
set.seed(3450) # Allerød's zipcode
for(i in 1:1000){
  
  # create bootstrap samples from candidates stratified by gender
  df_long_women <- df_long %>% filter(sex=="Woman")
  df_long_men <- df_long %>% filter(sex=="Man")
  
  bootstrap_sample_women <- sample_n(df_long_women, nrow(df_long_women), replace = TRUE)
  bootstrap_sample_men <- sample_n(df_long_men, nrow(df_long_men), replace = TRUE)
  
  # estimate subgroup AMCEs between attribute levels and the reference category
  amces_women <- amce(data=filter(bootstrap_sample_women), # data
                      choice ~ work_environment+workload+remuneration+position, # formula
                      id = ~id)
  
  amces_men <- amce(data=filter(bootstrap_sample_men), # data
                    choice ~ work_environment+workload+remuneration+position, # formula
                    id = ~id)
  
  #### WILLINGNESS TO PAY
  # harassment vs. sexism -- express as 1 percent of remuneration based on remuneration estimates
  pay_women_sexism[i] <- amces_women$estimate[3]/(0.5*(-amces_women$estimate[8]/0.1+amces_women$estimate[9]/0.1))
  pay_men_sexism[i] <- amces_men$estimate[3]/(0.5*(-amces_men$estimate[8]/0.1+amces_men$estimate[9]/0.1))
  pay_gap_sexism[i] <- pay_women_sexism[i]-pay_men_sexism[i] 

  #### WILLINGNESS TO WORK
  # harassment vs. sexism -- express as 1 percent of work load based on work load estimates
  work_women_sexism[i] <- amces_women$estimate[3]/(0.5*(-amces_women$estimate[5]/0.1+amces_women$estimate[6]/0.1))
  work_men_sexism[i] <- amces_men$estimate[3]/(0.5*(-amces_men$estimate[5]/0.1+amces_men$estimate[6]/0.1))
  work_gap_sexism[i] <- work_women_sexism[i]-work_men_sexism[i] 
}

### get sample estimate
df_long_women <- df_long %>% filter(sex=="Woman")
df_long_men <- df_long %>% filter(sex=="Man")

amces_women <- amce(data=filter(df_long_women), # data
                    choice ~ work_environment+workload+remuneration+position, # formula
                    id = ~id)

amces_men <- amce(data=filter(df_long_men), # data
                  choice ~ work_environment+workload+remuneration+position, # formula
                  id = ~id)

##########################
### WILLINGNESS TO PAY ###
##########################

# create dataframe with subgroup estimates for men and womens willingness to pay
pay_est <- data.frame(label = c(
  rep("Several experienced harassment",2),
  rep("Several experienced sexism and harassment",2)),
  sex = c("Women", "Men", "Women", "Men"),
  estimate = c(0,0,
               amces_women$estimate[3]/(0.5*(-amces_women$estimate[8]/0.1+amces_women$estimate[9]/0.1)),
               amces_men$estimate[3]/(0.5*(-amces_men$estimate[8]/0.1+amces_men$estimate[9]/0.1))),
  ci_lower = c(0,0,
               quantile(pay_women_sexism, probs = 0.085),
               quantile(pay_men_sexism, probs = 0.085)),
  ci_upper = c(0,0,
               quantile(pay_women_sexism, probs = 0.915),
               quantile(pay_men_sexism, probs = 0.915)),
  scale = "Willingness to pay")


### Gender differences in willingness to pay
women_sexism_pay <- amces_women$estimate[3]/(0.5*(-amces_women$estimate[8]/0.1+amces_women$estimate[9]/0.1))
men_sexism_pay <- amces_men$estimate[3]/(0.5*(-amces_men$estimate[8]/0.1+amces_men$estimate[9]/0.1))

# create data frame with estimates
pay_dif <- data.frame(label = c("Several experienced harassment",
                                   "Several experienced sexism and harassment"),
                         estimate = c(NA,
                                      women_sexism_pay-men_sexism_pay),
                         ci_lower = c(NA,
                                      quantile(pay_gap_sexism, probs = 0.025)),
                         ci_upper = c(NA,
                                      quantile(pay_gap_sexism, probs = 0.975)),
                         scale = "Willingness to pay")

## Figure 6's top left side
mm_pay <- pay_est %>%
  ggplot(data=., aes(y = label, x = estimate, color = sex, shape = sex)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  geom_point(aes(shape = sex),
             position = position_dodge2(width = 0.7)) +
  geom_linerange(aes(xmin=ci_lower,
                     xmax=ci_upper),
                 position = position_dodge2(width = 0.7)) +
  xlab("Willingness to pay (pct. of remuneration)") +
  ylab("") +
  scale_x_continuous(labels = seq(-0.8,0.8,0.05), breaks = seq(-0.8,0.8,0.05)) +
  theme_bw() +
  coord_cartesian(xlim = c(-0.25,0.25)) +
  scale_color_grey("", start = 0.1, end = 0.65) +
  scale_shape_discrete("") +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~scale)

## Figure 6's top right side
mm_dif_pay <- pay_dif %>%
  ggplot(data=., aes(x = estimate, y = label)) +
  geom_point(shape = 16, color = "black") +
  geom_linerange(aes(xmin=ci_lower,
                     xmax=ci_upper), color = "black") +
  xlab("Differences\n(women - men)") +
  ylab("") +
  scale_x_continuous(labels = seq(-0.8,0.8,0.1), breaks = seq(-0.8,0.8,0.1)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  theme_bw() +
  coord_cartesian(xlim = c(-0.2,0.2)) +
  scale_color_grey("", start = 0.1, end = 0.65) +
  scale_shape_discrete("") +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~scale)


###########################
### WILLINGNESS TO WORK ###
###########################

# create dataframe with subgroup estimates for men and womens willingness to work
work_est <- data.frame(label = c(
  rep("Several experienced harassment",2),
  rep("Several experienced sexism and harassment",2)),
  sex = c("Women", "Men", "Women", "Men"),
  estimate = c(0,0,
               amces_women$estimate[3]/(0.5*(-amces_women$estimate[5]/0.1+amces_women$estimate[6]/0.1)),
               amces_men$estimate[3]/(0.5*(-amces_men$estimate[5]/0.1+amces_men$estimate[6]/0.1))),
  ci_lower = c(0,0,
               quantile(work_women_sexism, probs = 0.085),
               quantile(work_men_sexism, probs = 0.085)),
  ci_upper = c(0,0,
               quantile(work_women_sexism, probs = 0.915),
               quantile(work_men_sexism, probs = 0.915)),
  scale = "Willingness to work")


### Gender differences in willingness to work
women_sexism_work <- amces_women$estimate[3]/(0.5*(-amces_women$estimate[5]/0.1+amces_women$estimate[6]/0.1))
men_sexism_work <- amces_men$estimate[3]/(0.5*(-amces_men$estimate[5]/0.1+amces_men$estimate[6]/0.1))

work_dif <- data.frame(label = c("Several experienced harassment",
                                    "Several experienced sexism and harassment"),
                          estimate = c(NA,
                                       women_sexism_work-men_sexism_work),
                          ci_lower = c(NA,
                                       quantile(work_gap_sexism, probs = 0.025)),
                          ci_upper = c(NA,
                                       quantile(work_gap_sexism, probs = 0.975)),
                          scale = "Willingness to work")

## Figure 6's bottom left side
mm_work <- work_est %>% 
  ggplot(data=., aes(y = label, x = estimate, color = sex, shape = sex)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  geom_point(aes(shape = sex),
             position = position_dodge2(width = 0.7)) +
  geom_linerange(aes(xmin=ci_lower,
                     xmax=ci_upper),
                 position = position_dodge2(width = 0.7)) +
  xlab("Willingness to work (pct. of workload)") +
  ylab("") +
  scale_x_continuous(labels = seq(-0.8,0.8,0.05), breaks = seq(-0.8,0.8,0.05)) +
  theme_bw() +
  coord_cartesian(xlim = c(-0.25,0.25)) +
  scale_color_grey("", start = 0.1, end = 0.65) +
  scale_shape_discrete("") +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~scale)

## Figure 6's bottom right side
mm_dif_work <- work_dif %>%
  ggplot(data=., aes(x = estimate, y = label)) +
  geom_point(shape = 16, color = "black") +
  geom_linerange(aes(xmin=ci_lower,
                     xmax=ci_upper), color = "black") +
  xlab("Differences\n(women - men)") +
  ylab("") +
  scale_x_continuous(labels = seq(-0.8,0.8,0.1), breaks = seq(-0.8,0.8,0.1)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
  theme_bw() +
  coord_cartesian(xlim = c(-0.2,0.2)) +
  scale_color_grey("", start = 0.1, end = 0.65) +
  scale_shape_discrete("") +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~scale)


### Combine all four plots
mm_pay + mm_dif_pay + mm_work + mm_dif_work + plot_layout(widths = c(3, 1))


ggsave("figure6.pdf", height = 3.7, width = 10)
ggsave("figure6.png", height = 3.7, width = 10, dpi = 600)
